A Perl script to convert from Bo Leuf's Cluster Wiki markup syntax to Creole — more or less! (^_^)
Here's what the code tries to do:
- strip off any Cluster Wiki headers and footers
- change quintuple-tics into properly-nested bold-italic markup
- change triple-tics into double-asterisks (bold)
- change double-tics into double-slashes (italics)
- remove Cluster Wiki markup that indicates small and big type size
- change #+ ... +# to
^^ ... ^^
(superscript) - change #- ... -# to
,, ... ,,
(subscript) - change $/ to
\\
(forced linebreak) - change double-double-quotes at line-beginning into single colons (blockquotes)
- change !! at beginning of line into == (subhead titles), and likewise for other header levels
- change double-pipes into single-pipes (tables)
- put double-square-brackets around
WikiWords
- change [sic] ... [/sic] into triple-curly-braces
- put triple-curly-braces around blocks of lines that begin with spaces
Here are things that the conversion code does NOT attempt to handle:
- changing [esc]...[/esc] into !... or ~... or ?
- fixing definition lists (which in Cluster Wiki use ": ... : ..." syntax)
#! /usr/bin/perl
# convert_to_creole.prl version 0.1 --- ^z --- 25 Dec 2007, 4-5 Jan 2008, ...
# usage: perl convert_to_creole.prl indir outdir
#
# take all files in "indir", do a bunch of pattern transformations
# globally in every file, store results in "outdir"
# indir and outdir must already exist
print "Converting to Creole:\n";
$indir = $ARGV[0];
$outdir = $ARGV[1];
opendir(INDIR, "$indir") or die "couldn't open input directory $indir";
opendir(OUTDIR, "$outdir") or die "couldn't open output directory $outdir";
@pages = grep !/^\./, readdir INDIR;
undef $/; # grab entire file at once
foreach $page (@pages) {
if ( -e "$indir/$page" ) {
open(F, "$indir/$page") or die "$page: $!";
print " $page ... ";
$body = <F>;
close(F);
# delete Cluster Wiki headers and footers
$body =~ s/^.*\263text\263//;
$body =~ s/\263.*$//;
# put a newline at the beginning of the file, to simplify logic below
$body = "\n$body";
# patterns to transform whole body, globally, in proper order
$body =~ s/\'\'\'\'\'(.*?)\'\'\'\'\'/\*\*\/\/\1\/\/\*\*/sog; # nested bold-italics
$body =~ s/\'\'\'(.*?)\'\'\'/\*\*\1\*\*/sog; # '''stuff''' -> **stuff**
$body =~ s/\'\'(.*?)\'\'/\/\/\1\/\//sog; # ''stuff'' -> //stuff//
$body =~ s/\[\-(.*?)\-\]/\1/sog; # [- stuff -] -> stuff
$body =~ s/\[\+(.*?)\+\]/\1/sog; # [+ stuff +] -> stuff
$body =~ s/\#\+(.*?)\+\#/\^\^\1\^\^/sog; # #+ ... +# -> ^^ ... ^^
$body =~ s/\#\-(.*?)\-\#/\,\,\1\,\,/sog; # #- ... -# -> ,, ... ,,
$body =~ s/\$\//\\\\/sog; # $/ -> \\
$body =~ s/\n\"\"/\n\:/sog; # ""stuff -> :stuff (after newline)
$body =~ s/\n\!\!\!\!\!\!/\n\=\=\=\=\=\=/sog; # !!!!!!stuff -> ======stuff
$body =~ s/\n\!\!\!\!\!/\n\=\=\=\=\=/sog; # !!!!!stuff -> =====stuff
$body =~ s/\n\!\!\!\!/\n\=\=\=\=/sog; # !!!!stuff -> ====stuff
$body =~ s/\n\!\!\!/\n\=\=\=/sog; # !!!stuff -> ===stuff
$body =~ s/\n\!\!/\n\=\=/sog; # !!stuff -> ==stuff
$body =~ s/\n\"\"/\n\:/sog; # ""stuff -> :stuff
$body =~ s/\|\|/\|/sog; # || -> | (table syntax fix)
$body =~ s/([A-Z][a-z0-9]+([A-Z][a-z0-9]+)+)/\[\[\1\]\]/sog; # WikiWord -> [[WikiWord]]
$body =~ s/\[sic\]/\{\{\{/sog; # [sic] -> {{{
$body =~ s/\[\/sic\]/\}\}\}/sog; # [/sic] -> }}}
$body =~ s/((\n [^\n]*)+)/\n\{\{\{\1\n\}\}\}/sog; # put {{{ ... }}} around block
# remove newline added to beginning of file
$body =~ s/\n(.*)/\1/sog;
open(F, ">$outdir/$page") or die "$page: $!";
print F $body;
close(F);
} else {
die "$page didn't exist: $!";
}
print "\n";
}
(correlates: Multiword link, SnipPattern01SourceCode, ZhurnalyWiki, ...)